library(survival)
library(FRESA.CAD)
## Loading required package: Rcpp
## Loading required package: stringr
## Loading required package: miscTools
## Loading required package: Hmisc
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## Loading required package: pROC
## Type 'citation("pROC")' for a citation.
## 
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var
#library(corrplot)
#source("~/GitHub/FRESA.CAD/R/RRPlot.R")
op <- par(no.readonly = TRUE)
pander::panderOptions('digits', 3)
pander::panderOptions('keep.trailing.zeros',TRUE)

RRPLOTS and flchain

odata <- flchain
odata$chapter <- NULL
pander::pander(table(odata$death))
0 1
5705 2169
rownames(odata) <- c(1:nrow(odata))
data <- as.data.frame(model.matrix(Surv(futime,death)~.,odata))

data$`(Intercept)` <- NULL

dataFL <- as.data.frame(cbind(time=odata[rownames(data),"futime"],
                              status=odata[rownames(data),"death"],
                              data))
pander::pander(table(dataFL$status))
0 1
4562 1962
dataFL$time <- dataFL$time/365

Exploring Raw Features with RRPlot

convar <- colnames(dataFL)[lapply(apply(dataFL,2,unique),length) > 10]
convar <- convar[convar != "time"]
topvar <- univariate_BinEnsemble(dataFL[,c("status",convar)],"status")
pander::pander(topvar)
age kappa lambda creatinine
0 0 0 0
topv <- min(5,length(topvar))
topFive <- names(topvar)[1:topv]

topFeature <- RRPlot(cbind(dataFL$status,dataFL[,topFive[1]]),
                  title=topFive[1])

  par(op)

## With Survival Analysis
RRanalysis <- list();
idx <- 1
for (topf in topFive)
{
  RRanalysis[[idx]] <- RRPlot(cbind(dataFL$status,dataFL[,topf]),
                  timetoEvent=dataFL$time,
                  atRate=c(0.90,0.80),
                  title=topf)
  idx <- idx + 1
  par(op)
}

names(RRanalysis) <- topFive

Reporting the Metrics

pander::pander(t(RRanalysis[[1]]$keyPoints),caption="Threshold values")
Threshold values
  @:0.9 @:0.8 @MAX_BACC @MAX_RR @SPE100
Thr 73.000 69.000 68.000 54.000 50.00000
RR 4.013 4.399 4.465 5.668 1.00000
RR_LCI 3.740 4.045 4.099 4.559 0.00000
RR_UCI 4.305 4.783 4.864 7.048 0.00000
SEN 0.581 0.713 0.726 0.960 1.00000
SPE 0.883 0.790 0.779 0.255 0.00614
BACC 0.732 0.752 0.753 0.607 0.50307
ROCAUC <- NULL
CstatCI <- NULL
RRatios <- NULL
LogRangp <- NULL
Sensitivity <- NULL
Specificity <- NULL

for (topf in topFive)
{
  CstatCI <- rbind(CstatCI,RRanalysis[[topf]]$c.index$cstatCI)
  RRatios <- rbind(RRatios,RRanalysis[[topf]]$RR_atP)
  LogRangp <- rbind(LogRangp,RRanalysis[[topf]]$surdif$pvalue)
  Sensitivity <- rbind(Sensitivity,RRanalysis[[topf]]$ROCAnalysis$sensitivity)
  Specificity <- rbind(Specificity,RRanalysis[[topf]]$ROCAnalysis$specificity)
  ROCAUC <- rbind(ROCAUC,RRanalysis[[topf]]$ROCAnalysis$aucs)
}
rownames(CstatCI) <- topFive
rownames(LogRangp) <- topFive
rownames(Sensitivity) <- topFive
rownames(Specificity) <- topFive
rownames(ROCAUC) <- topFive

pander::pander(ROCAUC)
  est lower upper
age 0.822 0.811 0.834
kappa 0.682 0.667 0.697
lambda 0.665 0.650 0.680
creatinine 0.590 0.574 0.606
pander::pander(CstatCI)
  mean.C Index median lower upper
age 0.775 0.775 0.765 0.785
kappa 0.671 0.671 0.658 0.683
lambda 0.657 0.657 0.645 0.669
creatinine 0.586 0.586 0.572 0.601
pander::pander(LogRangp)
age 0.00e+00
kappa 4.90e-175
lambda 4.41e-145
creatinine 2.67e-67
pander::pander(Sensitivity)
  est lower upper
age 0.581 0.558 0.602
kappa 0.319 0.298 0.340
lambda 0.300 0.279 0.321
creatinine 0.288 0.269 0.309
pander::pander(Specificity)
  est lower upper
age 0.883 0.873 0.892
kappa 0.900 0.891 0.908
lambda 0.899 0.890 0.907
creatinine 0.865 0.854 0.875
meanMatrix <- cbind(ROCAUC[,1],CstatCI[,1],Sensitivity[,1],Specificity[,1])
colnames(meanMatrix) <- c("ROCAUC","C-Stat","Sen","Spe")
pander::pander(meanMatrix)
  ROCAUC C-Stat Sen Spe
age 0.822 0.775 0.581 0.883
kappa 0.682 0.671 0.319 0.900
lambda 0.665 0.657 0.300 0.899
creatinine 0.590 0.586 0.288 0.865

Train Test Set

trainsamples <- sample(nrow(dataFL),0.5*nrow(dataFL))
dataFLTrain <- dataFL[trainsamples,]
dataFLTest <- dataFL[-trainsamples,]


pander::pander(table(dataFLTrain$status))
0 1
2231 1031
pander::pander(table(dataFLTest$status))
0 1
2331 931

Cox Modeling

ml <- BSWiMS.model(Surv(time,status)~.,data=dataFLTrain,loops=0)
sm <- summary(ml)
pander::pander(sm$coefficients)
Table continues below
  Estimate lower mean upper u.Accuracy
age 0.0923 0.0689 0.0753 0.0814 0.723
flc.grp 0.0669 0.0035 0.0441 0.0896 0.603
kappa 0.1598 0.0781 0.2152 0.3564 0.659
creatinine 0.0284 -0.1185 0.0551 0.2173 0.551
Table continues below
  r.Accuracy full.Accuracy u.AUC r.AUC full.AUC
age 0.605 0.728 0.737 0.625 0.744
flc.grp 0.720 0.728 0.625 0.737 0.744
kappa 0.725 0.728 0.633 0.741 0.744
creatinine 0.728 0.728 0.545 0.744 0.744
  IDI NRI z.IDI z.NRI Delta.AUC Frequency
age 0.183264 0.876 26.82 26.38 -0.11905 1
flc.grp 0.005685 0.283 5.06 7.65 -0.00739 1
kappa 0.001412 0.057 2.38 1.51 -0.00299 1
creatinine 0.000135 0.049 1.94 1.30 0.00000 1

Cox Model Performance

The evaluation of the raw Cox model with RRPlot()

timeinterval <- 5

h0 <- sum(dataFLTrain$status & dataFLTrain$time <= timeinterval)
h0 <- h0/sum((dataFLTrain$time > timeinterval) | (dataFLTrain$status==1))

pander::pander(t(c(h0=h0,timeinterval=timeinterval)),caption="Initial Parameters")
Initial Parameters
h0 timeinterval
0.139 5
index <- predict(ml,dataFLTrain)
rdata <- cbind(dataFLTrain$status,ppoisGzero(index,h0))

rrAnalysisTrain <- RRPlot(rdata,atRate=c(0.90,0.80),
                     timetoEvent=dataFLTrain$time,
                     title="Train: Breast Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

Time to event

toinclude <- rdata[,1]==1 
obstiemToEvent <- dataFLTrain[,"time"]
summary(obstiemToEvent)

Min. 1st Qu. Median Mean 3rd Qu. Max. 0.000 7.774 11.736 9.946 13.071 14.153

tmin<-min(obstiemToEvent)
if (tmin < 0.01) tmin <- 0.01
tmax<-max(obstiemToEvent)
sum(toinclude)

[1] 1031

timetoEvent <- meanTimeToEvent(rdata[,2],timeinterval)
timetoEvent[is.infinite(timetoEvent)] <- 3*tmax
timetoEvent[timetoEvent > 3*tmax] <- 3*tmax
timetoEvent[timetoEvent < tmin] <- tmin

lmfit <- lm(obstiemToEvent[toinclude]~0+timetoEvent[toinclude])
sm <- summary(lmfit)
pander::pander(sm)
  Estimate Std. Error t value Pr(>|t|)
timetoEvent[toinclude] 0.259 0.00999 26 1.08e-114
Fitting linear model: obstiemToEvent[toinclude] ~ 0 + timetoEvent[toinclude]
Observations Residual Std. Error \(R^2\) Adjusted \(R^2\)
1031 5.43 0.395 0.395
plot(timetoEvent,obstiemToEvent,
     col=1+rdata[,1],
     xlab="Expected time",
     ylab="Observed time",
     main="Expected vs. Observed",
     xlim=c(tmin,tmax),
     ylim=c(tmin,tmax),
     log="xy")
lines(x=c(tmin,tmax),y=lmfit$coefficients*c(tmin,tmax),lty=1,col="blue")
txt <- bquote(paste(R^2 == .(round(sm$r.squared,3))))
text(tmin+0.005*(tmax-tmin),tmax,txt,cex=0.7)
text(tmin+0.015*(tmax-tmin),tmax,sprintf("Slope=%4.3f",sm$coefficients[1]),cex=0.7)
legend("bottomright",legend=c("No Event","Event","Linear fit"),
             pch=c(1,1,-1),
             col=c(1,2,"blue"),
             lty=c(-1,-1,1)
             )

MADerror2 <- mean(abs(timetoEvent[toinclude]-obstiemToEvent[toinclude]))
pander::pander(MADerror2)

8.61

Cox Calibration

op <- par(no.readonly = TRUE)


calprob <- CoxRiskCalibration(ml,dataFLTrain,"status","time",timeInterval=timeinterval)


pander::pander(c(h0=calprob$h0,
                 Gain=calprob$hazardGain,
                 DeltaTime=calprob$timeInterval),
               caption="Cox Calibration Parameters")
h0 Gain DeltaTime
0.278 1.99 14.4

The RRplot() of the calibrated model

h0 <- calprob$h0
timeinterval <- calprob$timeInterval;

rdata <- cbind(dataFLTrain$status,calprob$prob)


rrAnalysisTrain <- RRPlot(rdata,atRate=c(0.90,0.80),
                     timetoEvent=dataFLTrain$time,
                     title="Cal. Train: Breast Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

Time to event after calibration

timetoEvent <- meanTimeToEvent(rdata[,2],timeinterval)
tmax<-max(c(obstiemToEvent))
lmfit <- lm(obstiemToEvent[toinclude]~0+timetoEvent[toinclude])
sm <- summary(lmfit)
pander::pander(sm)
  Estimate Std. Error t value Pr(>|t|)
timetoEvent[toinclude] 0.133 0.006 22.1 3.44e-89
Fitting linear model: obstiemToEvent[toinclude] ~ 0 + timetoEvent[toinclude]
Observations Residual Std. Error \(R^2\) Adjusted \(R^2\)
1031 5.75 0.323 0.322
plot(timetoEvent,obstiemToEvent,
     col=1+rdata[,1],
     xlab="Expected time",
     ylab="Observed time",
     main="Expected vs. Observed",
     xlim=c(tmin,tmax),
     ylim=c(tmin,tmax),
     log="xy")
lines(x=c(tmin,tmax),y=lmfit$coefficients*c(tmin,tmax),lty=1,col="blue")
txt <- bquote(paste(R^2 == .(round(sm$r.squared,3))))
text(tmin+0.005*(tmax-tmin),tmax,txt,cex=0.7)
text(tmin+0.015*(tmax-tmin),tmax,sprintf("Slope=%4.3f",sm$coefficients[1]),cex=0.7)
legend("bottomright",legend=c("No Event","Event","Linear fit"),
             pch=c(1,1,-1),
             col=c(1,2,"blue"),
             lty=c(-1,-1,1)
             )

MADerror2 <- c(MADerror2,mean(abs(timetoEvent[toinclude]-obstiemToEvent[toinclude])))
pander::pander(MADerror2)

8.61 and 14.45